package com.etoak.crawl;


import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.UnsupportedEncodingException;

/**
 * @author sam
 * page: 保存获取到的响应的相关内容;
 */
public class Page {

    /**
     * 网页内容 字节数组的形式
     */
    private final byte[] content;
    /**
     * url路径
     */
    private final String url;
    /**
     * 内容类型
     */
    private final String contentType;
    /**
     * 网页源码字符串
     */
    private String html;
    /**
     * 网页Dom文档
     */
    private Document doc;
    /**
     * 字符编码
     */
    private String charset;


    public Page(byte[] content, String url, String contentType) {
        this.content = content;
        this.url = url;
        this.contentType = contentType;
    }

    public String getCharset() {
        return charset;
    }

    public String getUrl() {
        return url;
    }

    public String getContentType() {
        return contentType;
    }

    public byte[] getContent() {
        return content;
    }

    /**
     * 返回网页的源码字符串
     *
     * @return 网页的源码字符串
     */
    public String getHtml() {
        if (html != null) {
            return html;
        }
        if (content == null) {
            return null;
        }
        if (charset == null) {
            charset = "utf-8";
        }
        try {
            this.html = new String(content, charset);
            return html;
        } catch (UnsupportedEncodingException ex) {
            ex.printStackTrace();
            return null;
        }
    }


    public Document getDoc() {
        if (doc != null) {
            return doc;
        }
        try {
            this.doc = Jsoup.parse(getHtml(), url);
            return doc;
        } catch (Exception ex) {
            ex.printStackTrace();
            return null;
        }
    }


}
